{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Training\n",
    "Using the Naive Bayes classificator to predict if a word is an answer or not."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Common imports \n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Pickling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import _pickle as cPickle\n",
    "from pathlib import Path\n",
    "\n",
    "def dumpPickle(fileName, content):\n",
    "    pickleFile = open(fileName, 'wb')\n",
    "    cPickle.dump(content, pickleFile, -1)\n",
    "    pickleFile.close()\n",
    "\n",
    "def loadPickle(fileName):    \n",
    "    file = open(fileName, 'rb')\n",
    "    content = cPickle.load(file)\n",
    "    file.close()\n",
    "    \n",
    "    return content\n",
    "    \n",
    "def pickleExists(fileName):\n",
    "    file = Path(fileName)\n",
    "    \n",
    "    if file.is_file():\n",
    "        return True\n",
    "    \n",
    "    return False"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Reading the dataframe\n",
    "Generated from the *Feature Engineering* notebook"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "wordPickleName = '../data/pickles/wordsDf.pkl'\n",
    "df = loadPickle(wordPickleName)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>text</th>\n",
       "      <th>isAnswer</th>\n",
       "      <th>titleId</th>\n",
       "      <th>paragrapghId</th>\n",
       "      <th>sentenceId</th>\n",
       "      <th>wordCount</th>\n",
       "      <th>NER</th>\n",
       "      <th>POS</th>\n",
       "      <th>TAG</th>\n",
       "      <th>DEP</th>\n",
       "      <th>shape</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Architecturally</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>None</td>\n",
       "      <td>ADV</td>\n",
       "      <td>RB</td>\n",
       "      <td>advmod</td>\n",
       "      <td>Xxxxx</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>school</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>None</td>\n",
       "      <td>NOUN</td>\n",
       "      <td>NN</td>\n",
       "      <td>nsubj</td>\n",
       "      <td>xxxx</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Catholic</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>NORP</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>Xxxxx</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>character</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>None</td>\n",
       "      <td>NOUN</td>\n",
       "      <td>NN</td>\n",
       "      <td>dobj</td>\n",
       "      <td>xxxx</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Atop</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>None</td>\n",
       "      <td>ADP</td>\n",
       "      <td>IN</td>\n",
       "      <td>prep</td>\n",
       "      <td>Xxxx</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              text  isAnswer  titleId  paragrapghId  sentenceId  wordCount  \\\n",
       "0  Architecturally     False        0             0         0.0          1   \n",
       "1           school     False        0             0         0.0          1   \n",
       "2         Catholic     False        0             0         0.0          1   \n",
       "3        character     False        0             0         0.0          1   \n",
       "4             Atop     False        0             0         1.0          1   \n",
       "\n",
       "    NER   POS   TAG     DEP   shape  \n",
       "0  None   ADV    RB  advmod   Xxxxx  \n",
       "1  None  NOUN    NN   nsubj    xxxx  \n",
       "2  NORP  None  None    None   Xxxxx  \n",
       "3  None  NOUN    NN    dobj    xxxx  \n",
       "4  None   ADP    IN    prep    Xxxx  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## One-hot encoding\n",
    "We need to encode the categorical data - NER, POS, TAG, DEP, shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "columnsToEncode = ['NER', 'POS', \"TAG\", 'DEP']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NER\n",
      "POS\n",
      "TAG\n",
      "DEP\n"
     ]
    }
   ],
   "source": [
    "for column in columnsToEncode:\n",
    "    print(column)\n",
    "    one_hot = pd.get_dummies(df[column])\n",
    "    one_hot = one_hot.add_prefix(column + '_')\n",
    "\n",
    "    df = df.drop(column, axis = 1)\n",
    "    df = df.join(one_hot)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>text</th>\n",
       "      <th>isAnswer</th>\n",
       "      <th>titleId</th>\n",
       "      <th>paragrapghId</th>\n",
       "      <th>sentenceId</th>\n",
       "      <th>wordCount</th>\n",
       "      <th>shape</th>\n",
       "      <th>NER_CARDINAL</th>\n",
       "      <th>NER_DATE</th>\n",
       "      <th>NER_EVENT</th>\n",
       "      <th>...</th>\n",
       "      <th>DEP_nummod</th>\n",
       "      <th>DEP_oprd</th>\n",
       "      <th>DEP_parataxis</th>\n",
       "      <th>DEP_pcomp</th>\n",
       "      <th>DEP_pobj</th>\n",
       "      <th>DEP_poss</th>\n",
       "      <th>DEP_predet</th>\n",
       "      <th>DEP_prep</th>\n",
       "      <th>DEP_relcl</th>\n",
       "      <th>DEP_xcomp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Architecturally</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>Xxxxx</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>school</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>xxxx</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Catholic</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>Xxxxx</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>character</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>xxxx</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Atop</td>\n",
       "      <td>False</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>Xxxx</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 83 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              text  isAnswer  titleId  paragrapghId  sentenceId  wordCount  \\\n",
       "0  Architecturally     False        0             0         0.0          1   \n",
       "1           school     False        0             0         0.0          1   \n",
       "2         Catholic     False        0             0         0.0          1   \n",
       "3        character     False        0             0         0.0          1   \n",
       "4             Atop     False        0             0         1.0          1   \n",
       "\n",
       "    shape  NER_CARDINAL  NER_DATE  NER_EVENT  ...  DEP_nummod  DEP_oprd  \\\n",
       "0   Xxxxx             0         0          0  ...           0         0   \n",
       "1    xxxx             0         0          0  ...           0         0   \n",
       "2   Xxxxx             0         0          0  ...           0         0   \n",
       "3    xxxx             0         0          0  ...           0         0   \n",
       "4    Xxxx             0         0          0  ...           0         0   \n",
       "\n",
       "   DEP_parataxis  DEP_pcomp  DEP_pobj  DEP_poss  DEP_predet  DEP_prep  \\\n",
       "0              0          0         0         0           0         0   \n",
       "1              0          0         0         0           0         0   \n",
       "2              0          0         0         0           0         0   \n",
       "3              0          0         0         0           0         0   \n",
       "4              0          0         0         0           0         1   \n",
       "\n",
       "   DEP_relcl  DEP_xcomp  \n",
       "0          0          0  \n",
       "1          0          0  \n",
       "2          0          0  \n",
       "3          0          0  \n",
       "4          0          0  \n",
       "\n",
       "[5 rows x 83 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Remove columns\n",
    "We need to remove the columns that are not features of the words."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "columnsToDrop = ['text', 'titleId', 'paragrapghId', 'sentenceId', 'shape']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.drop(columnsToDrop, axis = 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>isAnswer</th>\n",
       "      <th>wordCount</th>\n",
       "      <th>NER_CARDINAL</th>\n",
       "      <th>NER_DATE</th>\n",
       "      <th>NER_EVENT</th>\n",
       "      <th>NER_FAC</th>\n",
       "      <th>NER_GPE</th>\n",
       "      <th>NER_LANGUAGE</th>\n",
       "      <th>NER_LAW</th>\n",
       "      <th>NER_LOC</th>\n",
       "      <th>...</th>\n",
       "      <th>DEP_nummod</th>\n",
       "      <th>DEP_oprd</th>\n",
       "      <th>DEP_parataxis</th>\n",
       "      <th>DEP_pcomp</th>\n",
       "      <th>DEP_pobj</th>\n",
       "      <th>DEP_poss</th>\n",
       "      <th>DEP_predet</th>\n",
       "      <th>DEP_prep</th>\n",
       "      <th>DEP_relcl</th>\n",
       "      <th>DEP_xcomp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 78 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   isAnswer  wordCount  NER_CARDINAL  NER_DATE  NER_EVENT  NER_FAC  NER_GPE  \\\n",
       "0     False          1             0         0          0        0        0   \n",
       "1     False          1             0         0          0        0        0   \n",
       "2     False          1             0         0          0        0        0   \n",
       "3     False          1             0         0          0        0        0   \n",
       "4     False          1             0         0          0        0        0   \n",
       "\n",
       "   NER_LANGUAGE  NER_LAW  NER_LOC  ...  DEP_nummod  DEP_oprd  DEP_parataxis  \\\n",
       "0             0        0        0  ...           0         0              0   \n",
       "1             0        0        0  ...           0         0              0   \n",
       "2             0        0        0  ...           0         0              0   \n",
       "3             0        0        0  ...           0         0              0   \n",
       "4             0        0        0  ...           0         0              0   \n",
       "\n",
       "   DEP_pcomp  DEP_pobj  DEP_poss  DEP_predet  DEP_prep  DEP_relcl  DEP_xcomp  \n",
       "0          0         0         0           0         0          0          0  \n",
       "1          0         0         0           0         0          0          0  \n",
       "2          0         0         0           0         0          0          0  \n",
       "3          0         0         0           0         0          0          0  \n",
       "4          0         0         0           0         1          0          0  \n",
       "\n",
       "[5 rows x 78 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Split to test and train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "x_data = df.drop(labels=['isAnswer'], axis=1)\n",
    "y_data = df['isAnswer']\n",
    "\n",
    "x_train, x_test, y_train, y_test = train_test_split(x_data, \n",
    "                                                    y_data, \n",
    "                                                    test_size=0.1, \n",
    "                                                    random_state=4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "7810\n",
      "7810\n",
      "868\n",
      "868\n"
     ]
    }
   ],
   "source": [
    "print(len(x_train))\n",
    "print(len(y_train))\n",
    "print(len(x_test))\n",
    "print(len(y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5750    False\n",
       "6978    False\n",
       "4648    False\n",
       "2742    False\n",
       "3136    False\n",
       "Name: isAnswer, dtype: bool"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_test.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Train model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.naive_bayes import GaussianNB\n",
    "\n",
    "gnb = GaussianNB()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "predictor = gnb.fit(x_train, y_train)\n",
    "y_pred = predictor.predict(x_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Show Accuracy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Correctly guessed: 28.46%\n"
     ]
    }
   ],
   "source": [
    "correctCount = (y_test == y_pred).sum()\n",
    "\n",
    "print('Correctly guessed:', '{:.2f}%'.format((correctCount / len(y_test)) * 100))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "That doesn't really tell us anything. Since our dataset is not balanced - only 02,53%  of the words are answers we need to use a different metric.  "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Confusion matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[209, 619],\n",
       "       [  2,  38]], dtype=int64)"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.metrics import confusion_matrix\n",
    "\n",
    "confusion_matrix(y_test, y_pred)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Prettier confusion matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "import itertools\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "def plot_confusion_matrix(y_true, y_pred, classes,\n",
    "                          normalize=False,\n",
    "                          title='Confusion matrix',\n",
    "                          cmap=plt.cm.Blues,\n",
    "                          figsize=(9, 7)):\n",
    "    matrix = confusion_matrix(y_true, y_pred)\n",
    "\n",
    "    if normalize:\n",
    "        matrix = matrix.astype('float') / matrix.sum(axis=1)[:, np.newaxis]\n",
    "\n",
    "    plt.figure(figsize=figsize)\n",
    "    plt.imshow(matrix, interpolation='nearest', cmap=cmap)\n",
    "    plt.title(title)\n",
    "    plt.colorbar()\n",
    "\n",
    "    tick_marks = np.arange(len(classes))\n",
    "    plt.xticks(tick_marks, classes, rotation=45)\n",
    "    plt.yticks(tick_marks, classes)\n",
    "\n",
    "    fmt = '.2f' if normalize else 'd'\n",
    "    thresh = matrix.max() / 2.\n",
    "    for i, j in itertools.product(range(matrix.shape[0]), range(matrix.shape[1])):\n",
    "        plt.text(j, i, format(matrix[i, j], fmt),\n",
    "                 horizontalalignment=\"center\",\n",
    "                 size=int((figsize[0] / 10) * 38),\n",
    "                 color=\"white\" if matrix[i, j] > thresh else \"black\")\n",
    "\n",
    "    plt.tight_layout()\n",
    "    plt.ylabel('True label')\n",
    "    plt.xlabel('Predicted label')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVkAAAEmCAYAAADIhuPPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3XeYVEXWx/Hvj5yDgEhQRAUzoiAiQVExYMSIigiIophlTauuWXHdNYc1Z8WAYvY1oCioKKIYMGFCUJAgWTLn/aNqsIeZ6Wlmpqe7h/Px6We6762+t247nKmuW3VKZoZzzrn0qJTpCjjnXEXmQdY559LIg6xzzqWRB1nnnEsjD7LOOZdGHmSdcy6NPMi6CkNSTUkvSZov6ZlSHKevpDfKsm6ZIqm7pO8yXY/1mXycrCtvko4FhgJbAQuBicA1Zja2lMftB5wBdDGzlaWuaJaTZEAbM/sh03VxRfOWrCtXkoYCNwPXAk2BTYA7gUPK4PCtgO/XhwCbCklVMl0HB5iZP/xRLg+gPrAIODJJmeqEIPx7fNwMVI/7egDTgH8AM4HpwMC47wpgObAinmMQcDnwWMKxNwUMqBJfDwB+IrSmfwb6Jmwfm/C+LsB4YH782SVh32jgKuD9eJw3gMZFXFte/c9PqH9vYH/ge+BP4KKE8p2AD4F5seztQLW47714LYvj9fZJOP4FwAzg0bxt8T2bx3PsFF83B2YDPTL9u1GRH96SdeVpV6AGMDJJmYuBzkB7YAdCoLkkYf9GhGDdghBI75DU0MwuI7SOnzKzOmZ2f7KKSKoN3Ar0MrO6hEA6sZByGwCvxLKNgBuBVyQ1Sih2LDAQ2BCoBpyb5NQbET6DFsClwL3AcUAHoDtwqaTNYtlVwDlAY8JntxdwKoCZ7RbL7BCv96mE429AaNUPTjyxmf1ICMCPS6oFPAg8ZGajk9TXlZIHWVeeGgGzLfnX+b7AlWY208xmEVqo/RL2r4j7V5jZq4RW3JYlrM9qYDtJNc1suplNKqTMAcBkM3vUzFaa2XDgW+CghDIPmtn3ZrYEeJrwB6IoKwj9zyuAJwkB9BYzWxjPPwloB2BmE8xsXDzvL8DdwO4pXNNlZrYs1icfM7sXmAx8BDQj/FFzaeRB1pWnOUDjYvoKmwNTEl5PidvWHGOtIP0XUGddK2JmiwlfsU8Bpkt6RdJWKdQnr04tEl7PWIf6zDGzVfF5XhD8I2H/krz3S2or6WVJMyQtILTUGyc5NsAsM1taTJl7ge2A28xsWTFlXSl5kHXl6UNgKaEfsii/E77q5tkkbiuJxUCthNcbJe40s9fNbG9Ci+5bQvAprj55dfqthHVaF/8j1KuNmdUDLgJUzHuSDheSVIfQz30/cHnsDnFp5EHWlRszm0/oh7xDUm9JtSRVldRL0vWx2HDgEklNJDWO5R8r4SknArtJ2kRSfeCfeTskNZV0cOybXUbodlhVyDFeBdpKOlZSFUl9gG2Al0tYp3VRF1gALIqt7CFr7f8D2KzAu5K7BZhgZicS+prvKnUtXVIeZF25MrMbCWNkLwFmAVOB04HnY5GrgU+AL4AvgU/jtpKc603gqXisCeQPjJUIoxR+J9xx3514U2mtY8wBDoxl5xBGBhxoZrNLUqd1dC7hptpCQiv7qbX2Xw48LGmepKOKO5ikQ4D9CF0kEP4/7CSpb5nV2BXgkxGccy6NvCXrnHNp5EHWOefSyIOsc86lkQdZ55xLI08gsZ6o13ADa9Js40xXo0L5+Zfpma5ChWNLZs02syaplq9cr5XZygIT29Y+5utmtl+pK1dCHmTXE02abcy/n3gt09WoUPoNvDbTVahwlk68Y+3ZdUnZyiVU3zL56LWlE+8obpZcWnl3gXMud0lQqXLyR7GHUANJIyR9K+kbSbtK2kDSm5Imx58NY1lJulXSD5K+kLRTccf3IOucy22qlPxRvFuA/zOzrQiZ374BLgRGmVkbYFR8DdALaBMfgwlTn5PyIOucy2Gla8lKqgfsRsjlgJktN7N5hCTyD8diD/N3vo1DgEcsGAc0kNQs2Tk8yDrncpuU/JHcZoTp3Q9K+kzSfTGfRVMzmw4Qf24Yy7cgTAXPM438GdkK8CDrnMtdqfXJNpb0ScIjMZl5FWAn4H9mtiMhc9uFhZxpzRkL2ZY0N4GPLnDO5bbi+11nm1nHIvZNIyzP81F8PYIQZP+Q1MzMpsfugJkJ5RPHQrakmFSc3pJ1zuWw0vXJmtkMYKqkvNU19gK+Bl4E+sdt/YEX4vMXgePjKIPOwPy8boWieEvWOZe7RCr9rsU5g7DuWTXCwpoDCQ3QpyUNAn4FjoxlXyUsfPkDYRWMgcUd3IOscy6HCSqVLoyZ2USgsO6EvQopa8Bp63J8D7LOudxWqdQt2bTyIOucy10ipVldmeRB1jmXw5TqrK6M8SDrnMtt3pJ1zrk0SW1WV0Z5kHXO5TZvyTrnXLrIg6xzzqWN8BtfzjmXPt6Sdc659PKWrHPOpYm8Jeucc+nlQ7iccy49BFSq5N0FzjmXHqLwtQqyiAdZ51wOk7dknXMuneR9ss45lyYCeT5Z55xLDyFvyTrnXDp5n6xbr8yZOZ0P33iJT8eM4vcpPzJvzmzqNWzENh06c9igM9hki60KvOebzz7mmbtv5IdJE7HVq9l8m3YcftLZbN+pW6HnePelZ3j9mYf5dfK3qFIlWrXZmoP6ncwue+2f7svLuGMP7MQJh3ZhuzYtqFy5Er9O/5MxEyZz9rCn15TZf7ft2HvXrdlpm03Yvm0LataoxkmXPspjL31U5HH367YtZ/Xbkx233oSqVSrz3S9/cO8zY3hw5AflcVmlUtqWrKRfgIXAKmClmXWUtAHwFLAp8AtwlJnNVTjZLYTFFP8CBpjZp8mOn91/AlzOeW34gzx8wxXMnvEb7bvuyYHHDab1VtvywesvcGHf/flq/Pv5yk/8YDSXn3QEP076nG779WbP3kfz2y8/ctWQYxg/+o0Cx3/g+ku5/dKzmfPHdHY78Ai6738YM3+fxn/PPYmXH7unnK6y/FWqJB66dgD3X3U8tWpW5+EXPuTeZ8bw/S9/cPjeO+Ure1a/vTjl6N3ZotWG/DFnQbHHPqPvHoy8bQjttmzJyFGf8dDzH1CnVnXuvPRY/nve4em6pLIR+2STPVK0h5m1N7O8BRUvBEaZWRtgVHwN0AtoEx+Dgf8Vd2Bvyboy1Wa79lx5/3NsvdMu+bZ/+OZL3Hj+Kdw37CJufu5dAFasWM49V19AlapVufqh59l48y0B6D3gNM47eh/uveZC2nXuTvUaNQH4YdJEXht+P81abcawR1+mdt36ABxz+gX8s9+BPH7rMHbeYz+attikHK+4fAzt35M+vTpy4Y3Pccujb+fbV7ly/rbSFXe8xIw5C/hp6mxO6bMbN114VJHHbd6kPledeTBz5i2m89HDmPbHPABqVK/Kq3edzmnH7sGzb3zGh5//VPYXVQbS2Cd7CNAjPn8YGA1cELc/EletHSepgaRmZja9qAN5S9aVqV322r9AgAXYde+DaNZqM377+QcWzP0TgC/HjWHW9Gl073XYmgAL0LBJU3odPZC5s//gs7F/B5RPYsv2gGNPXBNgAerWb8gBxwxi5YrlvPP8k+m6tIypVaMa5w7ch3fHf18gwAKsWrU63+sPJv7ET1Nnp3TsvbtuQ/VqVXlo5AdrAizA0mUr+O+DbwJw4hGFd9tkixRaso0lfZLwGLzWIQx4Q9KEhH1N8wJn/Llh3N4CmJrw3mlxW5G8JevKTZUqVQGoXCUk9Pj609BH2K5z9wJl23XejSfv/A9fTxhH554HADBvziwANmy+cYHyTZq3BGDShA/LvuIZ1nPXrahftybPj5pI3do1OLDH9rRo2pDps+bzxthJzJq7qMTHbtqoHgBTps8psG/K7+GP4W4d25T4+GmnlPpkZyd0AxSmq5n9LmlD4E1J3yY/YwGW7OQeZF25+PHrL5j643dsvs0Oa1qhM6b+DMBGG7cuUH6jjTeNZX5Zs61ugw0AmDV9WoHys34P26ZPyc6vtaWx49ah+6NBvVp8PvJfNGvydyt+0V/LOOPq4Tz52iclOvaceSFAb9JsgwL7WjUP21pu1JCaNaqyZOmKEp0j3Uo7usDMfo8/Z0oaCXQC/sjrBpDUDJgZi08DEv/KtwR+T1q/UtXOuRQs+Wsxd1x2DpLoe9ZFf29fHP6B16xdp8B7atauC8Bfi/6+cdO+Sw8AXnniPhYv/Hv74oXzeXX4/fF58Td6ck2jhuHzuXhwLz79+lfa9b6Spt3Ppd8FD7Bi5SruuaIf7dom/cZapFHjvmXVqtUM6N2F5gnBu3q1Kgzt33PN6/p1apbuItIkr0822SPp+6XakurmPQf2Ab4CXgT6x2L9gRfi8xeB4xV0BuYn648Fb8m6NFuxYjk3nncyU3/4lqNOOTffsKxw76Dwr3uFbdu2465063UoY18byT+O3JOOu++DmfHJu29Qp35DACpVrnjthkrxs5j550KOu+ABli4LLcoRb3xK/bo1uf2SYxhyzO4MueKJdT72L7/N4YaH3uT8Qfvy8dMX8cLbE1mydDl7d9mGmtWrMnfBXzSsV6tAv2/WKP2Mr6bAyPj7VgV4wsz+T9J44GlJg4BfgSNj+VcJw7d+IAzhGljcCTzIurRZtXIlN10whIkfvMNB/U7myJPPybe/Vp281urCAu/Na8HWqlMv3/bTr7qFzbbenndeeIq3n3+SGrVqs3OPfek94FTO7N2deg0apelqMmfBoiUAvP3Rd2sCbJ5X3/sK+LtLoSQuu/0lJk+ZySl9duOY/Xdm2YqVvD3uW/5500jGP30RK1euYu7Cv0p+AWlWmtEFZvYTsEMh2+cAexWy3YDT1uUcHmRdWqxauZKb/3kq49/5P3odfQLHD720QJm8vtgZU39ms623z7cvry82r282T+XKlTmo38kc1O/kfNu/njAOgM22aVdGV5A9Jk8J3YF5wTbR/IVhW83qVUt1jsde+qjAZIWNN2pIvTo1+eybqaxcmaUtWbI/d0HWfLeSNEBS80zXY22S2kuq+FOJytCqVau47ZIzGffWK+x9RD9OuOCqQsttE4d6fTFuTIF9X4x7D6DQ4WCFGfvaSAC67HNQSaqc1d6bMBmALVtvVGDfVpuFbVNnzC3z8/bpFW7IP/vGhDI/dlkqTZ9seciaIAsMAMo0yEqqvNZrSeu86lp7Qh9MVpOUFd9KVq9ezR2Xnc37r7/Anr2P4aSLhhVZdvvO3Wm8UQvGvPYcU3/8bs32ubP+4LUnH6Rh46bs1D3/N7bCuhY+fuf/GPX8cDbbuh2d9zqg7C4mS/z46yzeHf89e3Rqm284VZUqlbj45PCr+fyoiSU+ft3aNQps26Vda84ftC9Tp//JPc+MLfGx000K+WSTPTItLf8wJW0KvAaMBboAvwGHmNkSSe2Bu4BawI/ACYS+j47A45KWALua2ZKE420R39OEML/4SOAn4HrCNDcDrjazpyT1AC4DpgN5rdDXgHeAXYHekrYErgCqxzoMNLNFknYmzEuuDSwD9gauBGpK6gYMM7On1rrOR2N5gNPN7INYh8uB2cB2wATgODMzSdcBBwMrgTcIs0gmA5sD9YE/gR5m9p6kMYSO9enAbcD2hP9nl5vZC5IGAAcANWId9kzxf1HaPHP3jYx55Tlq161PwyZNeebuGwuUOaBvmExQtWo1Bl/yb647qz+XDOhN1/0OoWq1anzwxkssmPcn5/33vjWzvfLccN5gVixfTqs2W1O9Zi1++vpzvvx4LE2atWTof+6mcpWs+FtT5s689ineeWgoL915Gs+PmsiM2Qvo0akt7dq2ZPTH3+XLMXBQj3YctEfoNtly06YADDy0y5oA/dI7X/DS6C/WlL9u6KHssGVLJnz9K/MW/MXWmzejV7dtmb9oKX3+cS8LFy8txytdd9nQWk0mnb+RbYBjzOwkSU8DhwOPAY8AZ5jZu5KuBC4zs7MlnQ6ca2aFDfh7HLjOzEZKqkFogR9GaGXuADQGxkt6L5bvBGxnZj/HQLglIZCeKqkxcAnQ08wWS7oAGBqD31NAHzMbL6ke4e7hpUBHMzu9kHrNBPY2s6WS2gDDCX8sAHYEtiWMoXsf6Crpa+BQYKsYcBuY2SpJ3wPbAK0JAbm7pI+Almb2g6RrgbfN7ARJDYCPJb0Vz7Mr0M7M/ly7cnH2ymCAxs1KNsRnXeWNYV28cD7P3ntzoWV6HHzUmrGyO3bdg8vvHcEzd93AmFefw8zYfJsdOPOa2wpNENNpj31554Wnee+VZ1mxfBmNm7Wg98DT6T3w1HyzwCqa73/5g27H/YfLTzuQPXbZknq1a/Dr9Llc9b9X+O+Db+a7+99uy5b0O7hzvvd32XFzuuy4ORAmGSQG2VEffstWrTfiiH12onbNakyfNZ/7Rozl+gfeYPqs+eVzgaWQ7X2yyhtGU6YHDYHtzZhcgRjIqhJaY1+a2SZx++bAM2a2k6TRFBJk4xi2b8ys5Vrbb4rHeiC+fhR4BlhACNx7JNTlHTNrHV8fCDxEGFQMUA34ELgZuMvMuq51ngEUEWQl1QduJwT7VUBbM6sVW7IXm9nesdz/CIH2SUIQ/QR4BXjZzJZLupjQgm0NjANOAq4BzjSzoyR9Qmitroyn3gDYF9gF2N3Mih1Gsvk2O9i/n3ituGJuHfQbeG2mq1DhLJ14x4RiZmflU32jNtay761Jy/x04/7rdMyyls6W7LKE56uAko5mLurPVLI/X4uTvBbhD8Ax+Q4mtaOY6XGFOAf4g9CargQkfq9a+/qrmNlKSZ0I3SNHA6cTvuKPAU4h9ElfCpxHSE6R1zIXcLiZfZdwTCTtQsFrdW69IUSlLG/JlmuvsJnNB+ZKypus3g94Nz5fCNQt5D0LgGmSegNIqi6pFiEA9ZFUWVITYDfg4xSqMY7w1X2LeLxaktoC3wLNY78skurGm0mF1iuqD0w3s9XxWioXUY54zDpAfTN7FTib0AIG+IjQd73azJYCE4GTCcEX4HXgjJjLEkk7pnCdzq0XpOSPTMvErbf+wH8kfUEIMlfG7Q8Bd0maKGntVm8/4Mz4ng+AjYCRwBfA58DbwPlmNqO4k5vZLMJIhuHxeOMIfaTLgT7AbZI+B94kfEV/B9gm1qvPWoe7E+gvaRzQluJblXWBl+N53yW0hDGzZYTMPuNiuTGx7Jfx9VWE7pYvJH0VXzvnFHLtJntkWlr6ZF328T7Zsud9smVvXftkazZra60H3p60zDfD9q2wfbLOOZd22dBaTcaDrHMud2VJv2syHmSdczkrjC7I/KyuZDzIOudymrdknXMuXeR9ss45lzZi/c5d4JxzaectWeecS6Msb8h6kHXO5S7lQJ9sdo99cM65pEq3Wu2ao4QcKJ9Jejm+bi3pI0mTJT0lqVrcXj2+/iHu37S4Y3uQdc7ltDLKXXAW8E3C638DN8V0rXOBQXH7IGCumW0B3BTLJa9fylfinHPZppgMXKk0ZCW1JKwwcl98LUIK0hGxyMNA7/j8kPiauH8vFdNc9j5Z51zOEqQy46txTHyf5x4zuyfh9c3A+fyd0rQRMM/M8pLkTwPylhZpQciYR8wPPT+Wn13UyYsMsnH5lSLFPK/OOZdRKbRWZxeVhSuulDLTzCbEFU2g8AUBLIV9hUrWkp0U35x40LzXBmyS7MDOOZd2pR9d0BU4OC64WgOoR2jZNpBUJbZmWxLW6oPQqt2YsJBAFf5e/LRIRbazzWxjM9sk/tx4rdceYJ1zGadSji4ws3+aWUsz25SwJNTbZtaXkKz/iFisP/BCfP5ifE3c/7YVk5Q7pRtfko6WdFF83lJSh1Te55xz6Va5kpI+SihvFesfCH2u98ft9wON4vahwIXFHajYG1+SbicsfbIbcC1hmey7gJ1LVHXnnCtDZTXjy8xGA6Pj85+AToWUWQocuS7HTWV0QZe4ZPdn8SR/5g3Mdc65TJIoTWu1XKQSZFdIqkS8gyapEbA6rbVyzrkUZXsWrlT6ZO8AngWaSLoCGEsKsxyccy7dBFSSkj4yrdiWrJk9ImkC0DNuOtLMvkpvtZxzLjVZ3luQ8oyvysAKQpeBT8V1zmUHrVN+gowoNmBKuhgYDjQnDMp9QtI/010x55wrToXoLgCOAzqY2V8Akq4BJgDD0lkx55xLRba3ZFMJslPWKlcF+Ck91XHOudSlmmkrk5IliLmJ0Af7FzBJ0uvx9T6EEQbOOZdxlbM8yiZryeaNIJgEvJKwfVz6quOcc+sm28fJFhlkzez+ovY551w2kEqVn6BcpJK7YHPgGmAbQiowAMysbRrr5ZxzKcnyhmxKY14fAh4kjJboBTwNPJnGOjnnXEpE2rJwlZlUgmwtM3sdwMx+NLNLgD3SWy3nnEtNWaxWm06pDOFaFhcK+1HSKcBvwIbprZZzzhVPyu3RBXnOAeoAZxL6ZusDJ6SzUs45l6osj7EpJYj5KD5dCPRLb3Wcc27d5OyML0kjSbIKo5kdlpYaOedcikR25CdIJllL9vZyq4VLuwY1q3Lgds0zXY0KZdqYmzNdhQqncd071u0NpVytVlIN4D2gOiEejjCzyyS1Joyi2gD4FOhnZsslVQceAToAc4A+ZvZLsnMkm4wwqsQ1d865clLK3KvLgD3NbJGkqsBYSa8RFkm8ycyelHQXMAj4X/w518y2kHQ0YQGDPmmsn3POZU5px8lasCi+rBofBuwJjIjbHwZ6x+eHxNfE/XupmHFiHmSdczmtkpI/gMaSPkl4DE58v6TKkiYCM4E3gR+BeWa2MhaZBrSIz1sAUwHi/vmEJcOLlOrKCEiqbmbLUi3vnHPpluJqtbPNrGNRO81sFdBeUgNgJLB1YcXyTplkX6FSWRmhk6Qvgcnx9Q6Sbivufc45Vx7ycsoW9UiVmc0DRgOdgQaS8hqhLYHf4/NpwMbhvKpCmDfwZ7LjptJdcCtwIOFOGmb2OT6t1jmXBQRUkZI+kr5fahJbsEiqSVgw9hvgHeCIWKw/8EJ8/mJ8Tdz/tpklbcmm0l1QycymrNW3uyqF9znnXNqVcphsM+BhSZUJjc6nzexlSV8DT0q6GvgMyEv9ej/wqKQfCC3Yo4s7QSpBdqqkToDFipwBfL/u1+Kcc2WrtPlkzewLYMdCtv8EdCpk+1LgyHU5RypBdgihy2AT4A/grbjNOecyLstn1aaUu2AmKTSJnXOuvOWNk81mqayMcC+FDFEws8GFFHfOufJTQVIdvpXwvAZwKHEwrnPOZZKoGN0FTyW+lvQoYVaEc85lXM53FxSiNdCqrCvinHPrqkK0ZCXN5e8+2UqEsWEXprNSzjmXktSm1WZU0iAbs8vsQFjXC2B1cbMbnHOuvORCSzbptNoYUEea2ar48ADrnMsiorKSPzItldwFH0vaKe01cc65dSTKLkFMuiRb46tKzJfYDThJ0o/AYsJ1mZl54HXOZZagSpb3FyTrk/0Y2Im/M4I751xWyWvJZrNkQVYAZvZjOdXFOefWWS6PLmgiaWhRO83sxjTUxznnUiayfw2tZEG2MlCHwpdbcM65zBNUyvL+gmRBdrqZXVluNXHOuXUUxsnmbpDN7po75xzZPxkhWZDdq9xq4ZxzJSKUqy1ZM0u6AqNzzmWayP58stl+Y84555JSMY+k75U2lvSOpG8kTZJ0Vty+gaQ3JU2OPxvG7ZJ0q6QfJH2RymxYD7LOuZyluDJCKXIXrAT+YWZbA52B0yRtQ8g0OMrM2gCj+DvzYC+gTXwMBv5X3Ak8yLqM+u2337j15pvYf9+ebNF6Y+rVqsZmrVrQv9+xTPrqq0xXL6vNnzePf553Nvvu0ZWtN2tB80a12WHrzTj68IN57913CpQ3M54b8RQH7L07W7VuTqtmDenWqT3Drr6ceXPnlv8FlBFJSR/JmNl0M/s0Pl8IfAO0AA4BHo7FHubvma+HAI9YMA5oIKlZsnN4kHUZ9b87buOC84Yydeqv7LtvL848eyjt2+/IM089SdfOHXl3dMFg4YI5c2bzxKMPUaduXQ44uDennnEO3XfrwccffcBhB+7D7bfkny900fnnMHjgcfz++2/0PvxI+g88iVq1a3HDv69hv726s3jx4gxdSelUUvIH0FjSJwmPQtcnlLQpYXnwj4CmZjYdQiAGNozFWpB/+a1pcVuRSrIygnNlpuPOnXjznffo1q17vu3PjniG4445irPOOJWJX36Todplt1abtubHabOpUiX/P+MZM6azZ9ed+fc1l3PCSadQq1YtZsyYzn1330mbtlvx9tiPqVmz5pryQ07qzzNPPsGLI0dwzHH9y/sySiXM+Cq2S2C2mXVMehypDvAscLaZLUjSAi5sR9IUsN6SdRnV+9DDCgRYgMOPOJI2bdvy3bffMnv27AzULPtVrly5QIAF2GijZuy8y64sWbKEP2ZMB2Dar1MwM7p0654vwAL03KcXAHPmzEl/pcucqKTkj2KPIFUlBNjHzey5uPmPvG6A+HNm3D4N2Djh7S2B35Md34Osy1pVq1YFKDSQuKLN/fNPPp0wnrr16tGiZYgHm23ehmrVqvHB2DEsWbIkX/m33ngNgC6F/LHLBaXJJxtXf7kf+GatfCwvAnnN+v7ACwnbj4+jDDoD8/O6FYriv70uK306YQJfT5rETh060qBBg0xXJ6vNmjWTB+69i9WrV/PHjOn83ysvMW/eXG69816qVasGwAaNGvHPf13JFf+6kK47t2Of/fanWrXqjPtwLN998zXX/fdmduqwc4avZN3ljS4oha5AP+BLSRPjtouA64CnJQ0CfgWOjPteBfYHfgD+AgYWdwIPsi7rLFq0iMGDBiCJa4b9O9PVyXqzZ83kP8OuWvO6dp063HbX/RzZ59h85c44+x80btyY8845nfvuvnPN9kMOPYI9e+5bbvUta6WJsWY2lqKH0xaY9RqX4DptXc7h3QUuqyxfvpy+Rx/JpElfccmll9Njjz0zXaWst/U22zF74QpmzF3C+M+/5cTBp3LqSQO47OIL8pUbdtVlDD1zCBddeiVffj+Fn3+bw/ARL/LZp5+w7x5d+fGHyRm6gpLLm/GV62t8OVd+RpOMAAAYPUlEQVQuVq5cyXHH9uGN1/+Ps875Bxddcmmmq5RTqlSpQuvNNudfV1zDoMFDuOPWG3l/7HsAjH77LW64/loGDzmDU884h2bNmlO3Xj323rcXd9//CHPn/sl//311hq+gZFTMf5nmQdZlhZUrV3L8ccfw0gvPM+S0M7ju+v9muko5bfc9egLw4ftjABj15usAdOm2W4GyHTt1pkaNGnz5+eflV8EyVNrRBWmvX6YrkAskDZDUPNP1qKhWrVrFCQP6MfLZEZw0+BRuvPnWTFcp582YEUYVVakcbrssX74MgD/nFBwOt2jhQpYuXUq16tXKr4JlJOSTLXYyQkZV+CArqUqy1ykaAGR9kC3htWXU6tWrOemEATzz1JMMGDiIW26/s/g3OQC+/GIiCxcsKLD9t2lTueWG6wHosVdo0e68y64A3HXHLSxatChf+evjTbMuXQu2crNeMa3YbGjJ5sw/SknHA+cSZld8YWb9JLUCHgCaALOAgWb2q6SHgD8JU+Q+lbSQECQ3BWZL6kcYotEDqA7cYWZ3x/OcTxjSsRp4DfgE6Ag8LmkJsKuZrRloKOkkQqKIaoRhHf3M7K9YhwXxvRsB55vZiDiw+SmgHuHzHwI0Azqb2dCYBegsM9tM0ubAw2bWTVIH4EbCkkCzgQFmNl3SaOADwlCUF4EbyuQDLyfXXHUFw594jAYNGtCseXOuueqKAmVOP/NsH8ZViCcff4THHnmQbt17sEmrVlSrVp0pv/zMm6+/yrJlyzjznPNov2MHAHofdiT3330n4z8eR+edtmWf/fandu06fDTufT79ZDzNW7TkzKHnZfiKSibzYTS5nAiykrYFLga6mtlsSRvEXbcTkjU8LOkE4Fb+TuTQFuhpZqskXQ50ALqZ2ZI4d3m+me0sqTrwvqQ3gK3i+3eJgXIDM/tT0unAuWb2SSHVe87M7o31vBoYBNwW9zUDusXjvgiMAI4FXjezayRVBmoBk4G83/DuwBxJLeJ7x8QZKbcBh5jZLEl9gGuAE+J7GpjZ7oV8boMJfwDYeJNNUvmoy92vU6YAMG/ePIZdc1WhZfodP8CDbCEOOuQwFsyfz/iPP+L9se+ydMkSGjVuwp4996X/CSfRc5/91pStUqUKz770OnfceiMvPf8sTw9/jFWrVtGi5cacePKpDD3/IjbcsGkGr6ZkciGfbE4EWWBPYISZzYZ8CcV3BQ6Lzx8Frk94zzNmtirh9YsJLdB9gHaSjoiv6xNSl/UEHjSzv9Y6TzLbxeDagNDKfD1h3/Nmthr4WlLeb/B44IEYOJ83s4nAQkl1JNUlTNl7AtiNEHCfA7YEtgPejHOqKwOJs0yeKqxiZnYPcA9Ahw4dk86vzpR7H3iIex94KNPVyEmdu3Sjc5duKZevVasW5114CeddeEkaa5UB2R1jc6ZPVhSThCFKLLN2SqHE1wLOMLP28dHazN5Yh/Mkegg43cy2B64AaiTsW7bWOTGz9wgB9Dfg0dgNAvAhYfbId8AYQoDdFXg/vndSQn23N7N9klyrc+uNbO+TzZUgOwo4SlIjCFnL4/YPgKPj877A2BSP9zowJLYmkdRWUm3gDeAESbXWOs9CoG4Rx6oLTI/H6lvciWM/8szYxXA/kJdZ/T1Cn/N7wGfAHsAyM5tPCLxNJO0aj1E1dqE4t94rzcoI5SEnugvMbJKka4B3Ja0iBKEBwJmEr97nEW98pXjI+wg3wT6NCSJmAb3N7P8ktQc+kbScME/5IkJr9a7CbnwB/yLkn5wCfEnRwThPD+A8SSuARUBeS3YMoavgvdiPPBX4Nl7/8ti1cauk+oT/bzcDk1K8XucqJEHWL6SoMBXXVXQdOnS09z8q7L6dK6nFS1dmugoVTuO6VScUl/s10TbtdrRHX3w3aZmOreuv0zHLWk60ZJ1zrihZ3pD1IOucy2XZkZ8gGQ+yzrmclTetNpt5kHXO5TYPss45lz7ZMBY2GQ+yzrmclt0hNncmIzjnXEEK42STPYo9hPSApJmSvkrYtoGkNyVNjj8bxu2SdKukHyR9IWmnoo8ceJB1zuWsMBmh5KvVRg8B+6217UJglJm1Icw4vTBu70XIc9KGkHzpf8Ud3IOscy6nlTbIxnwiayeDOgR4OD5/mL+z+x1CyPxnZjYOaBDTlxbJg6xzLqelsMZXY0mfJDwGp3DYpmY2HSD+3DBubwFMTSg3LW4rkt/4cs7ltBTGyc4uw2m1hZ0taW4Cb8k653JbetJw/ZHXDRB/zozbpxESOeVpCfye7EAeZJ1zOUtKWz7ZF4H+8Xl/4IWE7cfHUQadCSusTC/sAHm8u8A5l9NKO05W0nBCCtLGkqYBlxHWAHxa0iDgV+DIWPxVYH/Cen5/kUJ6VQ+yzrkcltpY2GTM7Jgidu1VSFkDTluX43uQdc7ltCyfVetB1jmXu/ImI2QzD7LOuZzm+WSdcy6NPJ+sc86lS+r5CTLGg6xzLmflwmq1HmSdczktu0OsB1nnXI7zlRGccy6dsjvGepB1zuWukLsg07VIzoOscy6n+ThZ55xLoyzvkvUg65zLbR5knXMuTUSpcsaWC0/a7ZxzaeQtWedcTsv2lqwHWedc7vLcBc45lz6eT9Y559LMx8k651wa+Ywv55xLpywPsgqLL7qKTtIsYEqm65GixsDsTFeigsmVz7SVmTVJtbCk/yNcWzKzzWy/0lWr5DzIuqwj6RMz65jpelQk/plmjk9GcM65NPIg65xzaeRB1mWjezJdgQrIP9MM8T5Z55xLI2/JOudcGnmQdc65NPIg65xzaeRB1jlA0raZrkMuknSQpA6Zrkc28yDr1nuS+gG3S6orZXtOp+whaXfgVOD7TNclm3nuArdek9QVOA7oZ2YLJVUGVmW4WllP0kHAicCrZrYw0/XJZt6SdestSTWA7YC2wLEAZrbKW7Mp+RaoDnSU1DDTlclmPk7WrZcktQSWmNkcSf2BrsA7ZjY87pf5P44CJB0KLATmAD8Dw4F3gbvMbF4m65atvCXr1juSziXMgHpR0r+A0cAHwG6SBgB4gC1I0lnAUKA1MAJoAZwNdAf+Ial+BquXtTzIuvVK7EvsaWb7A5OATmY2BRgJfA7sJKleJuuYbSRVktQK2MvMuhOC6yRgspl9B5wJbI3f4ymUdxe49YqkPYEmhH7YbsBBZrZcUltgMlDPzOZnso7ZRlIdYBnwEKGLoB3Qx8yWSDoeeB74y8xWZq6W2ctbsm69IOkASdsBS4ELgA5ArxhgTwNuB2p6gP2bgp2BF+KmhcDJwFExwPYHTgdqeYAtmjfv3fqiA/AvM+ss6VVge+BISZsA/YBjzOyvjNYwy8R+6fGSPgN6AdcB1YBRkkbHbceb2YzM1TL7eXeBq9AkHQ18ZWZfSboKWGxm10k6G6hP6Dq43cy+zWhFs0z83I4GBgB9geZmdnHcdwIwndAn+0PGKpkjvCXrKiRJVeJX2GHAAklvA+OAVpI2MbObY7nKZuaTDwqqR+izPh5YARwjabGZXWtmD2S2arnFg6yrqLYDJhJaYt2AecB5hPsQmxD6EgFWZ6Jy2UpSF2AD4D5gf6AN8BQwCzhL0sdm9lYGq5hz/MaXq3Ak1Qaek3Q+UAtoBbxO+No7AThAUiOfcJCfpA2BzQljX88ALgIaEEYUnA2MBb7OWAVzlPfJugpF0hHAN4SW65mEmUm9gKqEqbO/AXV9dlJ+ks4ElgMPE6bLDidMnd0OGG5mD0iqYWZLM1jNnOQtWVdhSDoPOIfQePgNuAJ4DfgQ6EJIBGMeYPOTNITwB+g1M1sSP5+DgXcIXYqXSqpOGCvr1pH3yboKIeaDPdDMukqqKWkPYFMzexD4UtIM4AUz8z7YBDFJzt7AxcBfkgYThreNMbOnJb0EbGRmHmBLyLsLXIUgaWPCzKMJhFSFDYDdgdvMbFgm65ZNEvuh855LupAQaJcCXxFGE1Q1swsyWNUKw1uyLqdJ2hWYaWY/xq+9A4B7zGyipL7ARn6D628JAbYPUF/S+Dhu+D3gJzObIek4YJCkWj5Bo/Q8yLqcJekU4CzCONingMfM7NSEfacR5th7gE0Q8w1cAjwOvCvpWDN7WVINSYMI/dp9PMCWDQ+yLidJOgToAWwLdCJMje0fp8wuAA4lTJX1IUcJJPUgfG69zezrOGX2CUlHm9mrkmoBR/gMuLLjowtczpHUCDgM2MnMVpvZOOAJYFPgCECEIPJV5mqZXWK6wkrAfoQbW11jd8CLhFEXL0vaw8xu8wBbtjzIupxjZnOAK4FvJd0Wp8a+T0gkXZ+Qn2BJRiuZfRrFkRX/JMzgag90klQtBtqDCPkIXBnz0QUuZ8ThRbWAKmb2X0ltgAuBxcA5cX2umh5g85N0KnAgMB4YH/tfLwKaAy8Co81seSbrWJF5S9blhDgjqR/wHnCxpFvMbDJwLdCMkIYPwjAkF8WMWccQZr91Ac6RNMjMrgXmA/vi92bSyj9cl/XiGNjuhK+0gwiB9gBJdc3sBEkXAEvA1+ZKJKkuUBvoTZjRVQm4Fzgxjmq7WFIjH0WQXt5d4HJCXHa6A3ClmXWR1IHw9fc/Pmi+aHHpmDrA3WZ2SNz2NvALMNSnGKeft2RdTjCzuZJWAJ/FTa0IuQmGZ65W2WPtCRd5eXLNbJGkDYCtJbUjrDQ7G7jAA2z58CDrsoakqkBLM/s5BoSpZjY3ocgSoKGkx4FdgL3N7OdM1DWbxM9tG+DzOFtrtJlNy9tvZr9Kuhu4i/Bv/kQzm5WZ2q5/vLvAZQVJInQHHAw0BTYEjjOzxWuV25aQtX+mmf1Y7hXNQnG57usIfa7tgD3NbHrcl5irYENgqZktyFhl10PeknVZISYqmUIItLsQhmQthjUBWHHiwaRM1jMbmdkUSaOAG4BrzGx64rI6kirFz25mZmu6fvKWrMsakuoTlorZGWgIvG5mr8Z9njC6CJLyhmGtIuQdGAE8bmZ/Saq99rcBV768JeuygqSzgMOBnsCnwFFAb0lzCN0DTSQ96flg85M0lDDRYIiZfSdpOfAvQm7YTYDOko4wsxUZreh6zIOsyzhJRxLmz/eJM4+mSxpJyGt6ObA1sI8H2PwkbU9IhHOgmc2P3QJvS1oN9AFaApd6gM0sD7IuG9QB7jKzn/K+3sY74vcALxO6bKdmuI7ZqApghG4CCIlxAD40s9E+xTg7+LRalw1WEZabrp9ws2sgsKuZ/eoBNj9JG0iqQlhF9itgtzj7bVUcwnWNpOoeYLOD3/hyGSGpP7AJIfPTW4S8BB0Ja011BP4BHOlp9/KT9A9gR8Lqu/fG562ADYBJQH/gYP/csod3F7hyJ+lsQh6CO4Hzgbrx+VDgasI3rKM9UOQXl4zZ38z2kvQFoZ/6fEmdCeNj6wMHmdl3Ga2oy8dbsq5cSaoGXG9mZ8fMWvvHR9W8FVElVfWbNQVJGgDMI6QoPJiQmHyppA3M7E9fyyw7eUvWlRtJBwIfA40kjQH+BA4ws9WS+kn6CXjHA2x+kk4m/FudCZwLLCS0aFfHlWabx28HHmCzkN/4cuVC0t6EroEFwMNANeDJeLOmP6EP9hdvieUnaXfgAEL/61hgLvAmYfmY44CjCRm2Vvtnl528u8ClnaQdgNeBwWb2oqQWhMX8zge+AbYC+vqU2fwk7QIMBLYws55xWydCV8GmhNbt1b6WWXbzIOvSKmbmHwvcCGxkZh0T9jUmjJH9y+fV5xdb91sQ8r7uD7wEPB2nytaIfbG1POF29vM+WZc2sQ+2H/AqYRXZeyW9ABwWc53OJuQ2dQkk7UxYzeCI2J1SiTBUa7WkZ/LGv3qAzQ3eJ+vSIqYkHAY8b2YzgOXAqcAs4K0YOFwCBW0JycgbENYuA3gQ+ALYHTgkQ9VzJeS/6C5dZhD6YU+W1C7emFlIGAv7NWEYkktgwffAfwg3CHeX1MTMVgKPAO8C72Syjm7deZ+sK1OS9iAMip9IWMngKEJmrX+Z2RexjI/nXIukEwmrG6wEbiL0x55K6IsdZWZ/ZLB6rhS8JevKTJxccCvh7vdjwD7A28Bo4JbYheAryq5F0iDgROB9QpCdQMhLcDdhiNZu3r2Su/zGlysTcVjWgUBPM/tD0qGEmzeTgfsISWDmZ7CK2WxLwqq7zwLPSpoNPGZmPeIIjA88zWPu8r+OrqzMABYRVjbAzEYCPwKnxr7Y2xMX93Nrbg4CVAY6Jey6C/hFUjUzG2Fmv5d/7VxZ8SDrSkXSGXFKZ23gQ6BtHEQP8AMwL2+NqYxVMgtJGgJcL6kGcDvQT9IFkmoBhxESldfNZB1d2fAbX67EJA0m9CX2NbPJkloDpxFS760m3Mjpm3fDywWSDgauIqQknBK3bUEYqjUZ2BY4wWfAVQweZF2JSXoAeNDMxiTMQmpKWASxDfC5mf2a2VpmH0mnABuY2bUxK5mZ2Yr4HKCumc3JYBVdGfLuAlcikqoCGxJarRDW44Iwp/47M3vJA2yRpgDdJW1pZstjgO1PyEi23ANsxeJB1qVMUt4aUsR0hPcSljrpGad/9gXuJwRfV7T3CcO0+ks6UNIxwOnAl5mtlksH7y5wSUmqDFQ2s+WSmuYNis+bUBCz9V9HWEJmR6C/9yUWT1IzwhTZgwlD24Z533XF5EHWFSm2XPvGlxsDewP7AisTJxRI2pwwu2t1zFPgUpTXDxuXQncVkAdZl5SkloQ589UIfYZfrLXfp8g6l4T3yboixQA6DbiHMLGge5yB5JxLkbdkXZEk7UlYuG8RYUbX08BoM7tO0lHAt96P6FxynrvAFSomezke+JQwt/4B4DjgMUltCEt675a5GjqXG7wl6wqQ1BwYARxuZtMlbUMIshcCkwgzkn7ycbDOFc/7ZF1RlgKLAczsa+AJoIOZzTKz0R5gnUuNB1m3RuwGIGZ9+h54NmF3HWDzuESKCnu/c64g75N1AEg6HThT0jjgNcIyMTdI+oywEOIhhIX9vH/JuXXgQdblZYVqB/QC9iTkNq1nZkPiirOVgYfMbHIGq+lcTvIbX+u5uKLBh8BbZnZCnIF0GLAr8Atwty897VzJeZ/ses7MfgPOBvaTdHSc3vk0YejWhkD1TNbPuVzn3QUOM3tO0jJgmCTM7ElJjwK149IxzrkS8iDrADCzVyStBu6RtNLMRgAeYJ0rJe+TdflI2hv40cx+ynRdnKsIPMg651wa+Y0v55xLIw+yzjmXRh5knXMujTzIOudcGnmQdc65NPIg63KGpFWSJkr6StIzkmqV4lg9JL0cnx8s6cIkZRtIOrUE57hc0rmpbl+rzEOSjliHc20q6at1raNLPw+yLpcsMbP2ZrYdsBw4JXFnzMK4zr/TZvaimV2XpEgDYJ2DrHPgQdblrjHAFrEF942kOwn5FjaWtI+kDyV9Glu8dQAk7SfpW0ljCUlwiNsHSLo9Pm8qaaSkz+OjC3AdIZfuREn/ieXOkzRe0heSrkg41sWSvpP0FmHZnqQknRSP87mkZ9dqnfeUNEbS9zEbGpIqS/pPwrlPLu0H6dLLg6zLOZKqENIyfhk3bQk8YmY7ElZzuAToaWY7AZ8AQyXVAO4lrE3WHdioiMPfCrxrZjsAOxGW27mQMAuuvZmdJ2kfoA0hJWR7oIOk3SR1AI4GdiQE8Z1TuJznzGzneL5vgEEJ+zYFdgcOAO6K1zAImG9mO8fjnySpdQrncRniuQtcLqkpaWJ8Pga4H2gOTDGzcXF7Z2Ab4P24gEM1QirHrYCf83LiSnoMGFzIOfYkLCCJma0C5ktquFaZfeLjs/i6DiHo1gVG5qWGlPRiCte0naSrCV0SdYDXE/Y9bWargcmSforXsA/QLqG/tn489/cpnMtlgAdZl0uWmFn7xA0xkC5O3AS8aWbHrFWuPVBWc8gFDDOzu9c6x9klOMdDQG8z+1zSAKBHwr61j2Xx3GeYWWIwRtKm63heV068u8BVNOOArpK2AJBUS1Jb4FugtaTNY7ljinj/KGBIfG9lSfUI2cjqJpR5HTghoa+3haQNgfeAQyXVlFSX0DVRnLrAdElVgb5r7TtSUqVY582A7+K5h8TySGorqXYK53EZ4i1ZV6GY2azYIhwuKS/h+CVm9r2kwcArkmYDY4HtCjnEWYR0j4OAVcAQM/tQ0vtxiNRrsV92a+DD2JJeBBxnZp9KegqYCEwhdGkU51/AR7H8l+QP5t8B7wJNgVPMbKmk+wh9tZ8qnHwW0Du1T8dlgmfhcs65NPLuAuecSyMPss45l0YeZJ1zLo08yDrnXBp5kHXOuTTyIOucc2nkQdY559Lo/wGzhOA4k0NSuQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 360x288 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plot_confusion_matrix(y_test, y_pred, ['not correct answer', 'correct answer'], figsize=(5, 4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False    828\n",
       "True      40\n",
       "Name: isAnswer, dtype: int64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_test.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred_Series = pd.Series(y_pred)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True     657\n",
       "False    211\n",
       "dtype: int64"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_pred_Series.value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Seems like I'm super biased for towards correct answers. But as I found during the *Data exploration*, there are a lot more answer-worthy words that are just not labeled since, I guess the Mechanical Turks had the job to label just 5. So, who knows, maybe I did some black magic and managed to extract all the answer worthy words!"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Saving the model\n",
    "We'll save our predictor, so we can use it to label new words."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "predictorPickleName = '../data/pickles/nb-predictor.pkl'\n",
    "dumpPickle(predictorPickleName, predictor)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
